library(sf)
library(tidyverse)
library(tmap)
library(RSocrata)
library(ggplot2)

Reading data from socrata portal on the Chicago Open Data Portal website. Data is crimes for the past year to date. read.socrata reads data from socrata data APIs on the web.

crimes <- read.socrata("https://data.cityofchicago.org/resource/x2n5-8w5q.csv")

Reading the community area shapefile from the web with st_read

chicago <- st_read("https://data.cityofchicago.org/resource/igwz-8jzy.geojson")
## Reading layer `igwz-8jzy' from data source `https://data.cityofchicago.org/resource/igwz-8jzy.geojson' using driver `GeoJSON'
## Simple feature collection with 77 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

Plotting the Chicago community areas

plot(chicago$geometry)

Checking the different types of crime with unique

unique(crimes$X_primary_decsription)
##  [1] "THEFT"                             "DECEPTIVE PRACTICE"               
##  [3] "ROBBERY"                           "MOTOR VEHICLE THEFT"              
##  [5] "CRIMINAL DAMAGE"                   "CRIMINAL TRESPASS"                
##  [7] "BATTERY"                           "SEX OFFENSE"                      
##  [9] "NARCOTICS"                         "ASSAULT"                          
## [11] "OTHER OFFENSE"                     "WEAPONS VIOLATION"                
## [13] "PUBLIC PEACE VIOLATION"            "BURGLARY"                         
## [15] "STALKING"                          "HOMICIDE"                         
## [17] "CRIM SEXUAL ASSAULT"               "ARSON"                            
## [19] "OFFENSE INVOLVING CHILDREN"        "KIDNAPPING"                       
## [21] "INTERFERENCE WITH PUBLIC OFFICER"  "PROSTITUTION"                     
## [23] "INTIMIDATION"                      "LIQUOR LAW VIOLATION"             
## [25] "HUMAN TRAFFICKING"                 "CONCEALED CARRY LICENSE VIOLATION"
## [27] "OBSCENITY"                         "PUBLIC INDECENCY"                 
## [29] "GAMBLING"                          "OTHER NARCOTIC VIOLATION"         
## [31] "NON-CRIMINAL"                      "CRIMINAL SEXUAL ASSAULT"

Filtering out all crimes that are not homicides with filter.

homicides <- crimes %>% filter(X_primary_decsription == "HOMICIDE")

Checking the number of homicides with nrow

nrow(homicides)
## [1] 521

Converting the data frame to sf with st_as_sf and setting the crs with st_crs

homicide_points <- st_as_sf(homicides, coords = c("longitude","latitude"))
st_crs(homicide_points) <- 4326

Changing the tmap mode to “view” for base map and interactivity

tmap_mode("view")
## tmap mode set to interactive viewing

Plotting the community areas and homicide points with tmap functions

tm_shape(chicago) +
  tm_borders() +
  tm_shape(homicide_points) +
  tm_dots(col = "red")

I will aggregate the points to communtiy areas in order to make choropleth map

To start, I spatially join the community area identifier to the points.

comm.points <- st_join(homicide_points,chicago["area_num_1"])

area_num_1 is a factor, so I change it to type integer with as.integer to avoid problems later.

comm.points$area_num_1 <- as.integer(comm.points$area_num_1)
chicago$area_num_1 <- as.integer(chicago$area_num_1)

Next, I set the geometry of the points to NULL for convenience in making the counts.

st_geometry(comm.points) <- NULL

count creates counts of the number of points with the same integer for area_num_1.head gives the first few observations

homicide.cnts <- comm.points %>% count(area_num_1)
head(homicide.cnts)
## # A tibble: 6 x 2
##   area_num_1     n
##        <int> <int>
## 1          1     6
## 2          6     2
## 3          7     4
## 4          8     2
## 5          9     6
## 6         10     1

I change the names to something more meaningful with rename

homicide.cnts <- homicide.cnts %>% rename(comm = area_num_1, homicide_count = n)
head(homicide.cnts)
## # A tibble: 6 x 2
##    comm homicide_count
##   <int>          <int>
## 1     1              6
## 2     6              2
## 3     7              4
## 4     8              2
## 5     9              6
## 6    10              1

Now I join the counts to the original Chicago community area boundaries with left_join

chicago <- left_join(chicago,homicide.cnts, by = c("area_num_1" = "comm"))

I plot the homicide counts for each community area with tmap functions

tm_shape(chicago) +
  tm_fill("homicide_count", style = "quantile", palette = c("blue","white","red")) +
  tm_borders()

Here I assign 0 for missing values, so I can make a density plot. This is easily done with is.na and bracket notation.

chicago$homicide_count[is.na(chicago$homicide_count)] <- 0

Lastly with ggplot2, I make a density plot to view the univariate distribution of homicide counts

ggplot(chicago, aes(x=homicide_count)) +
  geom_density()